# CODE TO REPLICATE TABLE 1 IN MAIN TEXT OF ARTICLE ""
rm(list=ls())
#install.packages("officer")
#install.packages("flextable")
library(flextable)
library(officer)
library(tidyr)

# Load dataset
validation_data <- read.csv("Data_Validation_AMOS_public.csv")

validation_data$age_match <- validation_data$surv.age==validation_data$vf.age

# Numbers in parentheses, by row (Quality type) -- Type=1:High, Type=2:Medium, Type=3: Low
totals_parentheses <- c(nrow(validation_data), rev(table(validation_data$Type)))
names(totals_parentheses) <- c("All observations, N=", "Low quality matches, N=", "Medium quality matches, N=", "High quality matches, N=")

# Build Table 
Column12_ExactMatch_Sex <- rbind(c("All observations", round(sum(diag((table(validation_data$surv.male,validation_data$vf.male)/sum(table(validation_data$surv.male,validation_data$vf.male))))),2) *100),
                                 c(paste0("(N=", totals_parentheses[1], ")"), ""),
                                 c("Low quality matches", round(sum(diag((table(validation_data$surv.male[validation_data$Type==3],validation_data$vf.male[validation_data$Type==3])/sum(table(validation_data$surv.male[validation_data$Type==3],validation_data$vf.male[validation_data$Type==3]))))),2)*100),
                                 c(paste0("(N=", totals_parentheses[2], ")"), ""),
                                 c("Medium quality matches", round(sum(diag((table(validation_data$surv.male[validation_data$Type==2],validation_data$vf.male[validation_data$Type==2])/sum(table(validation_data$surv.male[validation_data$Type==2],validation_data$vf.male[validation_data$Type==2]))))),2)*100),
                                 c(paste0("(N=", totals_parentheses[3], ")"), ""),
                                 c("High quality matches", round(sum(diag((table(validation_data$surv.male[validation_data$Type==1],validation_data$vf.male[validation_data$Type==1])/sum(table(validation_data$surv.male[validation_data$Type==1],validation_data$vf.male[validation_data$Type==1]))))),2)*100),
                                 c(paste0("(N=", totals_parentheses[4], ")"), ""))
colnames(Column12_ExactMatch_Sex) <- c("Sex", "Exact Match")
  


Column34_ExactMatch_BirthYear <- rbind(c("All observations", (round(table(validation_data$age_match)/sum(table(validation_data$age_match)),2)*100)[2]),
                                         c(paste0("(N=", totals_parentheses[1], ")"), ""),
                                         c("Low quality matches",  (round(table(validation_data$age_match[validation_data$Type==3])/sum(table(validation_data$age_match[validation_data$Type==3])),2)*100)[2]),
                                         c(paste0("(N=", totals_parentheses[2], ")"), ""),
                                         c("Medium quality matches", (round(table(validation_data$age_match[validation_data$Type==2])/sum(table(validation_data$age_match[validation_data$Type==2])),2)*100)[2]),
                                         c(paste0("(N=", totals_parentheses[3], ")"), ""),
                                         c("High quality matches", (round(table(validation_data$age_match[validation_data$Type==1])/sum(table(validation_data$age_match[validation_data$Type==1])),2)*100)[2]),
                                         c(paste0("(N=", totals_parentheses[4], ")"), ""))
colnames(Column34_ExactMatch_BirthYear) <- c("Birth Year", "Exact Match")


table1 <- cbind(Column12_ExactMatch_Sex,Column34_ExactMatch_BirthYear)

ft <- flextable(data.frame(table1))

# Create Word document and add table
doc <- read_docx() %>%
  body_add_par("Table 1: The Quality of Matches Between the Survey and Matches from the Database", style = "heading 1") %>%
  body_add_flextable(ft)

# Save the file
print(doc, target = "Table1.docx")
print(doc, target = "Table1.docx")

